Code
library(tidyverse)
library(ggtree)
library(ggtreeExtra)
library(ape)
library(ggnewscale)
library(RColorBrewer)
source("scripts/metadata_colors.R")library(tidyverse)
library(ggtree)
library(ggtreeExtra)
library(ape)
library(ggnewscale)
library(RColorBrewer)
source("scripts/metadata_colors.R")Paths
metadata_path <- "data/processed/metadata_ashton_desj_all_weavepop_H99.csv"
duplications_path <- "results/tables/duplications_putative.tsv" # Change it to final list of duplications when decided
merged_tree_path <- "data/processed/tree_merged.newick"
tree_merged_duplications_path <- "results/trees_dups/tree_merged_duplications.png"
tree_merged_duplications_12_13_path <- "results/trees_dups/tree_merged_duplications_12_13.png"
tree_merged_duplications_only_duplicated <- "results/trees_dups/tree_merged_duplications_only_duplicated.png"
tree_merged_duplications_only_duplicated2 <- "results/trees_dups/tree_merged_duplications_only_duplicated2.png"
tree_merged_duplications_only_duplicated3 <- "results/trees_dups/tree_merged_duplications_only_duplicated3.png"Load the necessary data
metadata <- read.csv(
metadata_path,
header = TRUE)Get one dataframe for each variable to be plotted as a separate metadata column in the tree
metadata$vni_subdivision <- factor(metadata$vni_subdivision,
levels = c("VNIa-4", "VNIa-5", "VNIa-32",
"VNIa-93", "VNIa-X", "VNIa-Y", "VNIb",
"VNIc", "VNIa-outlier"))
sublineage <- metadata %>%
filter(lineage == "VNI")%>%
select(strain, vni_subdivision)%>%
column_to_rownames("strain")%>%
droplevels()
lineage <- metadata %>%
select(strain, lineage)%>%
column_to_rownames("strain")
dataset <- metadata %>%
select(strain, dataset)%>%
column_to_rownames("strain")
source <- metadata %>%
select(strain, source)%>%
column_to_rownames("strain")duplications <- read.delim(
duplications_path,
sep = "\t", header = TRUE, stringsAsFactors = TRUE)duplications_full <- duplications %>%
select(strain, chromosome) %>%
distinct()Make matrix of duplicated chromosomes
dup_chroms <- duplications_full %>%
select(strain, chromosome)%>%
mutate(duplicated_full = 1)%>%
arrange(chromosome)%>%
pivot_wider(names_from = chromosome, values_from = duplicated_full, values_fill = 0)%>%
column_to_rownames("strain")%>%
mutate(across(everything(), ~ ifelse(. == 1, cur_column(),"Euploid")))
euploid_strain <- metadata %>%
filter(!strain %in% duplications_full$strain)%>%
select(strain)
for (chrom in colnames(dup_chroms)){
euploid_strain[chrom] <- "Euploid"
}
dup_chroms <- euploid_strain %>%
column_to_rownames("strain") %>%
bind_rows(dup_chroms)tree <- read.tree(merged_tree_path)Remove tips that are not in metadata$strain
tree <- drop.tip(tree, setdiff(tree$tip.label, metadata$strain))chrom_dup_colors <- c(chrom_colors, "Euploid" = "grey93")Subset the duplications_full data frame to only include strains with duplications of chromosomes 12 and 13
dup_chroms_12_13 <- dup_chroms %>%
select(chr12, chr13)keep_strains <- c(levels(duplications_full$strain), "H99", "Bt22", "Bt81")
tree_dups <- drop.tip(tree, setdiff(tree$tip.label, keep_strains))
sublineage <- sublineage %>%
filter(rownames(.) %in% keep_strains)%>%
droplevels()